# ------------------------- #
#    Downweight the data    #
# ------------------------- #

## Clear working space
rm(list = ls())

dir.create("loop_mods", showWarnings = F, recursive = T)
## Library


range01 <- function(x){(x-min(x, na.rm = T))/(max(x, na.rm = T)-min(x, na.rm = T))}

dyad_data <- readRDS("intl_order/network_layers_replication.rds")

dyad_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T 
) %>% transform(
  mid = cowmidongoing 
) %>% transform(
  fatal_mid = ifelse(mid == 1 & fatality > 0, 1, 0)
) 

dyad_data$contig[dyad_data$mindist == 0] <- 1

unweighted_data <- distinct(
  dyad_data, 
  ccode1, ccode2, year, .keep_all = T
) %>% mutate(
  major_power = cowmaj1  + cowmaj2,
  joint_major_power = if_else(cowmaj1 == 1 & cowmaj2 == 1, 1, 0),
  joint_region = if_else(region1 == region2, 1, 0)
) %>% dplyr::select(
  c(
    ccode1, ccode2, year, contig, 
    pta, bla, dcaV1, dcaV2,
    dipl, igo, 
    atop_defense, atop_offense, atop_neutral, atop_consul,
    joint_region,
    major_power, joint_major_power, cowc1, cowc2,
    fatal_mid, mid, region1, region2
  )
) %>% reshape2::melt(
  id = c(
    "ccode1", "ccode2", "year", "contig", 
    "joint_region", "cowc1", "cowc2",
    "major_power", "joint_major_power", "mid", "fatal_mid", "region1", "region2"
  )
) %>% dplyr::rename(
  layer = variable
) %>% mutate(
  mid_id = paste0(cowc1, "-", cowc2),
  value = if_else(is.na(value), 0, value),
  region_id = paste0(region1, "-", region2)
) 

unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490)] <- 1
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode1 == 626 & !(unweighted_data$ccode2 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[is.na(unweighted_data$contig) & unweighted_data$ccode2 == 626 & !(unweighted_data$ccode1 %in% c(625, 530, 500, 501, 484, 490))] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode1 == 625 & unweighted_data$ccode2 %in% c(500, 501, 490)] <- 0
unweighted_data$contig[unweighted_data$year >= 2011 & unweighted_data$ccode2 == 625 & unweighted_data$ccode1 %in% c(500, 501, 490)] <- 0
sum(is.na(unweighted_data$contig))


max(subset(unweighted_data, contig == 1)$year)
sum(is.na(unweighted_data$major_power))

unweighted_data$major_power <- factor(
  unweighted_data$major_power,
  levels = c("0", "1", "2")
)

input_data <- unweighted_data
input_data$year_sqr <- input_data$year ^ 2

igo_temp <- filter(
  input_data, 
  grepl("igo", layer)
) 

range01_data <- list()
for (i in 1:length(unique(igo_temp$layer)))
{
  temp_subset <- filter(
    igo_temp,
    layer == unique(igo_temp$layer)[i]
  )
  for (z in 1:length(unique(temp_subset$year)))
  {
    if (any(subset(temp_subset, year == unique(temp_subset$year)[z])$value != 0))
    {
      subset_by_year <- filter(
        temp_subset, 
        year == unique(temp_subset$year)[z]
      )
      
      subset_by_year <- transform(
        subset_by_year,
        value = round(range01(value), 2)
      )
      range01_data[[length(range01_data) + 1]] <- subset_by_year
    }
  }
}

igo_temp <- do.call(rbind, range01_data)

input_data <- filter(
  input_data,
  !grepl("igo", layer)
) %>% bind_rows(
  igo_temp
) %>% mutate(
  layer = as.character(layer)
)



layers_to_check <- data.frame(
  expand.grid(
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1),
    c(0, 1)
  )
) 


layers_to_check <- layers_to_check[rowSums(layers_to_check) > 0,]
colnames(layers_to_check) <- c("atop_defense",
                               "atop_offense",
                               "atop_consul",
                               "atop_neutral",
                               "bla", 
                               "dipl", 
                               "igo",
                               "pta", 
                               "dcaV2", 
                               "dcaV1"
)
layers_to_check[is.na(layers_to_check)] <- 0

layers_to_check <- filter(
  layers_to_check,
  !(dcaV1 == 1 & dcaV2 == 1),
  atop_defense + atop_offense + igo > 0
)



formula_list <- list(
  ## Fixed 
  form <- as.formula(value ~ contig + year),
  form <- as.formula(value ~ contig + year + major_power),
  form <- as.formula(value ~ contig + year + year_sqr + major_power),
  form <- as.formula(value ~ contig  | year ),
  form <- as.formula(value ~ contig +  joint_region | year ),
  form <- as.formula(value ~ contig | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + joint_region | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + major_power | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig + joint_region + major_power | year + cowc1 + cowc2),
  form <- as.formula(value ~ contig | year + cowc1 + cowc2 + region1 + region2),
  form <- as.formula(value ~ contig + major_power | year + cowc1 + cowc2 + region1 + region2),
  NULL
)


outcome_data <- data.frame(
  matrix(0, ncol = 4, nrow  = 0)
)
colnames(outcome_data) <- c("out_group_fmids", "out_group_mids",
                            "layer_combo", "formula_downweight")






input_data <- filter(
  input_data,
  !is.na(value),
  layer %in% colnames(layers_to_check)
)

input_data$layer <- factor(
  input_data$layer, 
  levels = c(
    sort(unique(input_data$layer))
  )
) 

state_id <- distinct(
  input_data,
  ccode1, cowc1
) %>% dplyr::rename(
  ccode = ccode1, 
  cowc = cowc1
)

state_id <- distinct(
  input_data,
  ccode2, cowc2
) %>% dplyr::rename(
  ccode = ccode2, 
  cowc = cowc2
) %>% bind_rows(
  state_id
) %>% distinct(
  ccode, cowc
)

check_existing_output <- NA
for (forms in 1:length(formula_list))
{
  if (!is.null(formula_list[[forms]]))
  {
    layer_summary <- list()
    for (i in 1:length(unique(input_data$layer)))
    {
      if (!any(grepl("\\(", as.character(formula_list[[forms]]))))
      {
        temp_data <- filter(
          input_data,
          layer == unique(input_data$layer)[i]
        ) %>% distinct(
          ccode1, ccode2, year, .keep_all = T
        ) %>% mutate(
          dyad_id = paste0(ccode1, "-", ccode2)
        )
        
        
        years_unique <- data.table::setDT(temp_data)[,.(n = length(unique(value))), by = 'year']
        years_unique <- data.frame(years_unique)
        temp_data <- filter(
          temp_data,
          year %in% years_unique$year[years_unique$n > 1]
        ) %>% data.frame(.)
        
        
        try({layer_model <- fixest::feols(formula_list[[forms]], data = temp_data)})  
      } 
      temp_data$pred_values <- round(predict(layer_model, newdata = temp_data), 2)
      layer_summary[[i]] <- data.frame(temp_data)
    }
    layer_summary_bind <- do.call(bind_rows, layer_summary)
    layer_summary_bind[is.na(layer_summary_bind)] <- 0
    layer_summary_bind$tie <- layer_summary_bind$value - layer_summary_bind$pred_values
    layer_summary_bind$layer <- as.character(layer_summary_bind$layer)
  } else {
    layer_summary <- list()
    for (nulls in 1:length(unique(input_data$layer)))
    {
      temp_data <- filter(
        input_data,
        layer == unique(input_data$layer)[nulls]
      ) %>% distinct(
        ccode1, ccode2, year, .keep_all = T
      ) %>% mutate(
        dyad_id = paste0(ccode1, "-", ccode2)
      )
      
      
      years_unique <- data.table::setDT(temp_data)[,.(n = length(unique(value))), by = 'year']
      years_unique <- data.frame(years_unique)
      layer_summary[[nulls]] <- filter(
        temp_data,
        year >= min(years_unique$year[years_unique$n > 1])
      ) %>% data.frame(.)
    }
    layer_summary_bind <- do.call(bind_rows, layer_summary)
    layer_summary_bind[is.na(layer_summary_bind)] <- 0
    layer_summary_bind$tie <- layer_summary_bind$value 
    layer_summary_bind$layer <- as.character(layer_summary_bind$layer)
  }
  
  
  for (layers in 1:nrow(layers_to_check))
  {
    gc()
    downweighted_data <- filter(
      layer_summary_bind, 
      layer %in%
        c(
          as.character(colnames(layers_to_check)[layers_to_check[layers,] == 1])
        )
    ) %>% group_by(
      year
    ) %>% mutate(
      n = length(unique(layer))
    ) %>% ungroup(
      .
    ) %>% mutate(
      tie = round(tie, 2)
    )
    
    downweighted_data <- data.table::setDT(downweighted_data)[,.(tie = mean(tie)), by = 'ccode1,ccode2,year']
    downweighted_data <- left_join(
      downweighted_data,
      state_id,
      by = c(
        "ccode1" = "ccode"
      )
    ) %>% left_join(
      state_id,
      by = c(
        "ccode2" = "ccode"
      )
    ) %>% dplyr::rename(
      namea = cowc.x, 
      nameb = cowc.y
    ) %>% dplyr::select(
      namea, nameb, year, tie
    ) %>% data.frame(.)
    
    
    adjacency_matricies <- list()
    for (v in min(layer_summary_bind$year):max(layer_summary_bind$year))
    {
      temp_edgelist <- filter(
        downweighted_data,
        year == v
      )
      
      temp_edgelist <- arrange(
        temp_edgelist,
        namea, nameb
      ) %>% distinct(
        namea, nameb, tie
      )
      
      temp_edgelist$tie[temp_edgelist$tie < 0] <- 0
      
      if (nrow(temp_edgelist) > 0)
      {
        temp_net <- igraph::graph_from_edgelist(as.matrix(temp_edgelist[,c(1:2)]), directed = F)
        igraph::E(temp_net)$weight <- round(temp_edgelist$tie)
        
        temp_adj <- igraph::as_adjacency_matrix(temp_net, attr="weight", sparse = T)
        temp_adj <- as.matrix(temp_adj)
        temp_adj <- temp_adj[order(rownames(temp_adj)),order(colnames(temp_adj))]  
        adjacency_matricies[[length(adjacency_matricies) + 1]] <- as.matrix(temp_adj)
      } else {
        
        temp_adj <- matrix(0, 
                           nrow = nrow(adjacency_matricies[[length(adjacency_matricies)]]), 
                           ncol = nrow(adjacency_matricies[[length(adjacency_matricies)]]))
        rownames(temp_adj) <- colnames(temp_adj) <- colnames(adjacency_matricies[[length(adjacency_matricies)]])
        adjacency_matricies[[length(adjacency_matricies) + 1]] <- temp_adj
      }
      
      
    }
    
    set.seed(9733)   
    community.blondel <- list()
    for (com in 1:length(adjacency_matricies))
    {
      try({g <- igraph::graph_from_adjacency_matrix(adjacency_matricies[[com]], mode = "undirected")
      
      comm <- igraph::cluster_louvain(
        g 
      )  
      
      
      community.blondel[[com]] <- data.frame(
        names = comm$names,
        group = comm$membership
      )  
      
      
      
      rm(comm)
      }, silent = T)
    }
    
    
    for (i in 1:length(community.blondel))
    {
      community.blondel[[i]]$year <- i + 1815
      if (i != 1)
      {
        community.blondel[[i]]$group <- community.blondel[[i]]$group + max(community.blondel[[i - 1]]$group) 
      }
    }
    
    community.blondel <- do.call(bind_rows, community.blondel)
    community.blondel$group <- as.numeric(as.factor(community.blondel$group))
    
    max_year <- data.table::setDT(community.blondel)[,.(max_year = max(year)), by = "group"]
    max_year <- arrange(
      max_year, 
      max_year
    ) %>% data.frame(.)
    community.blondel <- data.frame(community.blondel)
    group_names <- unique(community.blondel$group)
    edgelist <- data.frame(from = NA, to = NA, weight = NA)
    
    for (i in 1:nrow(max_year))
    {
      names_temp <- subset(community.blondel, group == max_year$group[i])$names
      year_temp  <- max_year$max_year[i] + 1
      temp_data <- subset(community.blondel, names %in% names_temp & year == year_temp)
      from <- max_year$group[i]
      to <- unique(temp_data$group)
      weights <- c()
      
      if (length(to) > 0)
      {
        for (z in 1:length(to))
        {
          union_data <- union(names_temp, subset(temp_data, group == to[z])$names)
          sender_vec <- if_else(union_data %in% names_temp, 1, 0)
          reciever_vec <- if_else(union_data %in% subset(temp_data, group == to[z])$names, 1, 0)
          
          weights[z] <- length(intersect(names_temp, subset(temp_data, group == to[z])$names))/length(union_data)
        }
        edgelist <- bind_rows(
          edgelist, 
          data.frame(
            from = rep(from, length(to)), 
            to = to, 
            weight = weights
          )
        ) %>% filter(
          !is.na(from)
        )
      }
    }
    edgelist$weight <- round(edgelist$weight, 2)
    group_matrix <- igraph::graph.data.frame(edgelist, directed = F)
    group_matrix <- igraph::as_adj(group_matrix, sparse = F)
    group_matrix <- group_matrix[ order(row.names(group_matrix)),order(colnames(group_matrix))]
    diag(group_matrix) <- 2
    group_matrix[is.na(group_matrix)] <- 0
    group_matrix[group_matrix < 0] <- 0
    
    
    g <- igraph::graph_from_adjacency_matrix(group_matrix, mode = "undirected", weighted = T)
    set.seed(477)
    comm <- igraph::cluster_louvain(g, weights = igraph::E(g)$weight)
    between.group <- data.frame(
      group = comm$names,
      interlayer = comm$membership
    )
    
    
    
    community.blondel <- community.blondel[,!(colnames(community.blondel) %in% c("interlayer", "n"))]
    community.blondel <- mutate(
      community.blondel,
      group = as.character(group)
    ) %>% left_join(
      between.group,
      by = c(
        "group"
      )
    ) 
    
    war.data.temp.test <- distinct(
      unweighted_data, 
      cowc1, cowc2, year, contig, fatal_mid, .keep_all = T
    ) %>% left_join(
      community.blondel,
      by = c(
        "cowc1" = "names", "year"
      )
    ) %>% left_join(
      community.blondel,
      by = c(
        "cowc2" = "names", "year"
      )
    ) %>% transform(
      dyad = paste(cowc1, cowc2, sep = "-"),
      contig = if_else(contig %in% 1:2, 1, 0),
      year_sqr = year ^ 2,
      out_group = if_else(interlayer.x != interlayer.y, 1, 0)
    )
    
    
    try({mode_temp <- alpaca::feglm(fatal_mid ~ out_group | ccode1 + ccode2 + year + dyad, data = war.data.temp.test)
    temp_outcome_data <- data.frame(
      out_group_fmids = try({coef(mode_temp)[1]}),     
      out_group_mids = try({summary(mode_temp, cluster = c("ccode1", "ccode2", "year", "dyad"))[[1]][4]}),
      layer_combo = layers, 
      formula_downweight = forms
    )
    
    outcome_data <- bind_rows(
      outcome_data,
      temp_outcome_data
    ) %>% distinct(
      layer_combo, formula_downweight, .keep_all = T
    )})
    rownames(outcome_data) <- NULL
    cat(paste0("\rFormula: ", forms, ", layer combo: ", layers))
  }
  saveRDS(outcome_data, file = paste0("loop_mods/avg_outcome_data_", forms, ".rds"))
}
